#!/bin/bash
################################################################################
#  Copyright (C) 2015-2018 IQIYI All rights reserved.
# 
#  Author     : 王海斌
#  E-mail     : wanghaibin@qiyi.com
#  Version    :
#  Date       : Thu 10 Jun 2021 10:00:03 AM CST
#  Destription: 监控doh-server相关句柄数是否正常，每2秒输出一次相关内容
#
#             1.启动监控方法：
#                      # key 一般为ps -ef 结果的第八项完整内容
#                      nohup ./watch_fd watch key > key.log &
#             2.处理日志中的rss字段, 生成key.log.txt数据文件：
#                      ./watch_fd deal_log_rss key.log
#
################################################################################
cd `dirname $0`

global_proc_ids=""       # 最新进程id列表
sleep_idle=5   # 监控间隔
global_outSeq=1       # 输出行数
global_procInfoInt=20 # 进程启动信息输出记录行数，

startTime=0  # 进程启动时间

#  处理 watch 生成的结果日志文件,
#  每12个点(一分钟)中取最小的一个值
function deal_log_rss() {
    local local_file=$1
    if [ "s${local_file}" == "s" ] ; then
        echo "输入 log 日志文件名, 如 ./txt.sh aaa.log"
        exit 0
    fi
    # 采样点数
    local sample_count=12
    cat "${local_file}" |grep VmRSS |awk -F'VmRSS=' '{print $2}' |awk -v a_sample="${sample_count}" -F'kB' 'BEGIN{max=9999999990; cur=max; seq=1} {if($1<cur) cur=$1 fi; if(seq==a_sample) {seq=1; print cur; cur=max; } else  seq++; fi; } END{ if(seq!=a_sample && seq!=1) print cur fi;}' >${local_file}.txt
}


# 获取进程的启动时间
function GetProcStartTime() {
    local local_PID=$1
    # 要输出的日志文件
    local local_log_file=$2
    if [ -z ${local_PID} ]
    then
        echo "usage ${0} pid"
        return 0    
    fi
    local STARTTIME=$(awk '{print int($22 /100)}' /proc/$local_PID/stat)
    local UT=$(awk '{print int($1)}' /proc/uptime)
    local NOW=$(date +%s)
    startTime=$((NOW - (UT - STARTTIME)))
	# 运行时长, (格式为[[DD-]HH:]MM:SS)
    local RUNING=`ps -o etime= -p $local_PID`

    if [ "s${local_log_file}" == "s" ] ; then
        echo "proc $local_PID start at : `date -d @$startTime`, runing:${RUNING}"
    else
        echo "proc $local_PID start at : `date -d @$startTime`, runing:${RUNING}" >>${local_log_file}
    fi
}

# 获取最新进程的进程id列表
function fun_get_latest_pids() {
    local local_keyInfo=$1
    # 要输出的日志文件
    local local_log_file=$2

    global_proc_ids=(`ps -ef |grep "${local_keyInfo}" |grep -v grep |awk '{print $2,$8}' |grep "${local_keyInfo}" |awk '{print $1}'`)
    for tmpPid in "${global_proc_ids[@]}"
    do
        oldTime=${startTime}
        GetProcStartTime ${tmpPid} ${local_log_file}
    done
}


# 输出某个id的信息, 参数为id信息
function PrintProcsInfo() {
    local local_PID=$1

    # 获取cpu信息
    # local proc_cpu=`ps -p ${local_PID} -o  pcpu|egrep -v CPU |awk '{prnit $1}'`
    local proc_cpu=`top -bn1 -n 1 -p ${local_PID} | tail -1 | awk '{ print $9 }'`
    if [ ! -d /proc/${local_PID} ]
    then
        if [ "s${local_log_file}" == "s" ] ; then
            echo "proc ${local_PID} is dead"
        else
            echo "proc ${local_PID} is dead" >> ${local_log_file}
        fi
    else 
        local local_proInfo=`ls /proc/${local_PID}/fd -l 2>&1 |awk '
        BEGIN {sum=0; sock=0; efd=0; ep=0; fd=0;} 
        { 
            sum += 1; 
            if($0~"socket") { sock+=1; } 
            else if($0~"eventfd") { efd+=1; } 
            else if($0~"eventpoll") { ep+=1; } 
            else { fd+=1; }
        }
        END {print "total_fd="sum, "sock_num="sock, "eventfd_num="efd, "eventpool_num="ep, "file_fd="fd }'
        `
        local local_vmInfo=`cat /proc/${local_PID}/status |awk 'BEGIN{vm=""; rss="";} { if($0~"VmSize") vm=$2$3 fi; if($0~"VmRSS") rss=$2$3 fi;} END {print "VmSize="vm, "VmRSS="rss}'`
        if [ "s${local_log_file}" == "s" ] ; then
            echo "`date "+%Y-%m-%d %T"` pid=${local_PID} ${local_proInfo} ${local_vmInfo} %CPU=${proc_cpu}"
        else
            echo "`date "+%Y-%m-%d %T"` pid=${local_PID} ${local_proInfo} ${local_vmInfo} %CPU=${proc_cpu}" >> ${local_log_file}
        fi
    fi
}

# 句柄监控函数
function fun_watch_fd() {
    # 要监控的进程关键字信息
    local local_keyInfo=$1
    # 要输出的日志文件
    local local_log_file=$2
    if [ "s${local_keyInfo}" == "s" ] ; then
        echo "key is empty"
        exit 0
    fi
    if [ "s${local_log_file}" == "s" ] ; then
        # 使用key路径下内容
        local_log_file="${local_keyInfo}.watch"
    elif [ "${local_log_file}" == "stdout" ];then
        local_log_file=""
    fi

    fun_get_latest_pids ${local_keyInfo} ${local_log_file}

    while true
    do
        sleep ${sleep_idle}
        global_outSeq=$((${global_outSeq}+1))
        for tmpPid in "${global_proc_ids[@]}"
        do
            PrintProcsInfo ${tmpPid} ${local_log_file}
            if [ $(( $global_outSeq % ${global_procInfoInt} )) -eq 0 ]
            then
                GetProcStartTime ${tmpPid} ${local_log_file}
            fi
        done
    done
}

# 帮助
function help() {
    local proc=$1
    echo "用法:"
    echo "    脚本名         操作类型     操作参数"
    echo "    ${0}  watch        proc_key  [ log_file/stdout ]"
    echo "    ${0}  deal_log_rss log_file"
    echo "参数说明:"
    echo "    watch             # 执行脚本资源监控功能"
    echo "    deal_log_rss      # 执行脚本日志处理功能(处理watch输出的日志文件中VmRSS数据)"
    echo "    log_file          # 日志文件全路径"
    echo "    stdout            # 输出到屏幕"

}

process_type=$1
if [ "${process_type}" == "watch" ]; then
    fun_watch_fd ${2} ${3}
elif [ "${process_type}" == "deal_log_rss" ]; then
    logfile=${2}
    if [ "s$logfile" == "s" ];then
        help ${0}
        exit 0
    else
        deal_log_rss ${logfile}
    fi
else
    help ${0}
fi
